#! /bin/env python 
'''
Created on Oct 28, 2012

@author: Shunping Huang
'''

import os
import pysam
import argparse as ap 
import logging
from modtools.mod import Mod
from modtools.utils import readableFile, writableFile, validChromList
from modtools import tmpmod

from time import localtime,strftime

DESC = 'A FASTA generator for in silico genomes'
__version__ = '0.1.0'
logger = None


def seq2fasta(fp, sample, seq, chrom, width):
    fp.write('>%s chromosome:%s:%s:1:%d:1 %s\n' 
             % (chrom, sample, chrom, len(seq), strftime("date:%Y%m%d",localtime())))
    length = len(seq)
    for i in range(0, length, width):
        if (i+width)<=length:
            fp.write(seq[i:i+width])
        else:
            fp.write(seq[i:])
        fp.write('\n')
    fp.flush()


def initLogger():
    global logger
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    ch = logging.StreamHandler()
    ch.setLevel(logging.INFO)
    formatter = logging.Formatter("[%(asctime)s] %(name)-10s: %(levelname)s: %(message)s",
                                  "%Y-%m-%d %H:%M:%S")
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    
    
if __name__ == '__main__': 
    initLogger()   
    # Parse arguments
    p = ap.ArgumentParser(description=DESC, 
                          formatter_class = ap.RawTextHelpFormatter)
    group = p.add_mutually_exclusive_group()    
    group.add_argument("-q", dest='quiet', action='store_true',
                       help='quiet mode')
    group.add_argument('-v', dest='verbosity', action="store_const", const=2,
                       default=1, help='verbose mode')        
    p.add_argument('-c', metavar='chromList', dest='chroms', type=validChromList,
                   default = [],
                   help='a comma-separated list of chromosomes in output' +
                        ' (default: all)')
#    p.add_argument('-s', metavar='sample', dest='sample', default=None, 
#                   help='sample name (default: <mod_prefix>)')
    p.add_argument('-w', metavar='width', dest='width', type=int, default = 72,  
                   help='the width in output FASTA  (default: 72)')    
    p.add_argument('-o', metavar='out.fa', dest='outfasta', type=writableFile, 
                   default=None, help='the output FASTA file ' + 
                   '(default: out.fasta)')
    
    p.add_argument('mod', metavar='in.mod', 
                   type=readableFile, help='an input MOD file')
                        
    p.add_argument('infasta', metavar='in.fa',
                   type=readableFile, help='an input (reference) FASTA file')                    
        
    args = p.parse_args()
    
    if args.quiet:                
        logger.setLevel(logging.CRITICAL)
    elif args.verbosity == 2:                    
        logger.setLevel(logging.DEBUG)
        
    # A compromise: adding complexity but reducing unnecessary argument.
    tmpmod = tmpmod.getTabixMod(args.mod)
    mod = Mod(tmpmod)
    
    sample = mod.header.get('sample')
    if sample is None:
        sample = os.path.basename(args.mod)
        idx = sample.index('.')
        if idx >= 0:
            sample = sample[:idx]
        sample.replace(':','_')
        
    chromAliases = mod.meta.chromAliases

    chroms = args.chroms                         
    if len(args.chroms) == 0: 
        chroms = mod.chroms
    
    width = args.width
        
    if args.outfasta is None:               
        outfasta = open('out.fa', 'wb')
    else:        
        outfasta = open(args.outfasta, 'wb')
                                        
    # Build index on fasta
    if not os.path.isfile(args.infasta+'.fai'):
        pysam.faidx(args.infasta)            
    
    fp = open(args.infasta+'.fai')
    inChroms = [line.rstrip().split('\t')[0] for line in fp]
    fp.close()
         
#    print (inChromLengths)

    infasta = pysam.Fastafile(args.infasta)
        
                                
    logger.info("input MOD file: %s (%s)", args.mod, mod.fileName)
    logger.info("input FASTA file: %s", infasta.filename)
    logger.info("output FASTA file: %s", outfasta.name)                
    
    for outChrom in chroms:        
        logger.info("processing chromosome '%s'", outChrom)         
        chrom = chromAliases.getBasicName(outChrom)
        
        modChrom = chromAliases.getMatchedAlias(chrom, mod.chroms)
        if modChrom is None:            
            logger.warning("chromosome alias not found for '%s' in MOD", outChrom) 
            modChrom = chrom
        else:            
            logger.info("chromosome alias '%s' used for '%s' in MOD", 
                        modChrom, outChrom)      
        mod.load(modChrom)
        
        
        logger.info("%d line(s) found in MOD", len(mod.data))
        if len(mod.data) == 0:
            logger.warning("chromosome '%s' not found in MOD, maybe incorrect name or alias",
                            outChrom)
                    
#        posmap=mod.getPosMap(chrom, fastaChromLens[chrom])
#        print(posmap.toCSV()[:1000])
                
        inFastaChrom = chromAliases.getMatchedAlias(chrom, inChroms)
        if inFastaChrom is None:            
            logger.warning("chromosome alias not found for '%s' in FASTA",
                           outChrom)
            inFastaChrom = chrom
        else:
            logger.info("chromosome alias '%s' used for '%s' in FASTA",
                        inFastaChrom, outChrom)        
                      
        seq=mod.getSeq(infasta, modChrom, inChroms)
        
        logger.info("old: %d bp -> new: %d bp", 
                    mod.meta.getChromLength(inFastaChrom), 
                    len(seq))                     
                        
        seq2fasta(outfasta, sample, seq, outChrom, width)
        
    outfasta.close()
    
    # Clean up the temp files
    os.remove(mod.fileName)
    os.remove(mod.fileName+'.tbi')
    
    logger.info("All Done!")